/*  Note: The data file produced will be at the individual level and cover all ages, irrespective of the variables requested. 
 In this current version of the tool, it is not possible to select variables by wave - though you may edit the code below to do so. */

/****************************************************************************************
* Sample Code for your request:  3b620ca633234d53b8afc87bada06fce       *
*****************************************************************************************/
clear all
set more off

// Replace "where" with the filepath of the working folder (where any temporary files created by this programme will be stored)   eg:  c:\ukhls\temp
cd "C:\Users\dbayliss\OneDrive - Health and Safety Executive\Stata_db" 

// Replace "where" with the folderpath where the data has been downloaded and unzipped   eg:   c:\ukhls_data\UKDA-6614-stata\stata\stata13_se\ukhls
global ukhls "C:\Users\dbayliss\OneDrive - Health and Safety Executive\Data\6614stata_A81DD7E55181F34C796A8FFD15CC671B699B510C19A799AB7AE8465472B3E6FC_V1\UKDA-6614-stata\stata\stata13_se\ukhls"

// Replace "where" with the filepath of the folder where you want to store the final dataset produced by this programme.  eg:  c:\ukhls\results
global outputpath "C:\Users\dbayliss\OneDrive - Health and Safety Executive\Stata_db"

// The file produced by this programme will be named as below. If you want to change the name do it here.
local outputfilename "UKHLS_long_abcdefghijk"

// By default the data will be extracted from the waves whose letter prefixes are written below, and merged. If you want to a different selection of waves, make the change here
local allWaves = "a b c d e f g h i j k"

// These variables from the indall files will be included. These include some key variables as determined by us PLUS any variables requested by you. 
local indallvars "age_dv country ethn_dv gor_dv hhsize hidp mastat_dv nchild_dv pidp pno psnen01_lw psnen01_xw psnen91_lw psnen91_xw psnen99_xw psnenbh_lw psnenbh_xw psnenub_lw psnenub_xw psnenui_lw psnenui_xw psnenus_lw psnenus_xw psu racel_dv sex_dv strata urban_dv"

// These variables from the indresp files will be included. These include some key variables as determined by us PLUS any variables requested by you. 
local indvars "age_dv bendis1 bendis10 bendis12 bendis2 bendis3 bendis4 bendis5 bendis7 bendis8 bendis97 country depenth1 depenth2 depenth3 depenth4 depenth5 depenth6 ethn_dv fimngrs_dv fimnlabnet_dv fimnnet_dv fimnsben_dv gor_dv hhsize hhtype_dv hidp ind5mus_lw ind5mus_xw indbd91_lw indbdub_lw indin01_lw indin01_xw indin91_lw indin91_xw indin99_lw indinbh_xw indinub_lw indinub_xw indinui_lw indinui_xw indinus_lw indinus_xw indns91_lw indnsub_lw indpxbh_xw indpxub_lw indpxub_xw indpxui_lw indpxui_xw indpxus_lw indpxus_xw indscbh_xw indscub_lw indscub_xw indscui_lw indscui_xw indscus_lw indscus_xw jbsat jbsec jbsic07_cc jbsoc00 jbsoc10 jbstat jwbs1_dv jwbs2_dv mastat_dv nchild_dv nxtendreas7 pidp pno psu racel_dv reasend7 reasend7_1 reasend7_2 reasend7_3 reasend7_4 reasend7_5 reasend7_6 reasend7_7 reasend7_8 reasend7_9 scghq1_dv scghq2_dv sclfsat1 sclfsat2 sclfsato scsf1 scsf2a scsf2b scsf3a scsf3b scsf4a scsf4b scsf5 sex_dv sf1 sf12mcs_dv sf12pcs_dv strata tenure_dv ukborn urban_dv wkaut1 wkaut2 wkaut3 wkaut4 wkaut5"

// These variables from the child files will be included. These include some key variables as determined by us PLUS any variables requested by you. 
local chvars "age_dv country gor_dv hidp pidp pno psnen01_lw psnen91_lw psnenbh_lw psnenbh_xw psnenub_lw psnenub_xw psnenui_lw psnenui_xw psnenus_lw psnenus_xw psu sex_dv strata urban_dv"

// These variables from the hhresp files will be included. These include some key variables as determined by us PLUS any variables requested by you. 
local hhvars "country fihhmnnet1_dv gor_dv hhden01_xw hhden91_xw hhden99_xw hhdenbh_xw hhdenub_xw hhdenui_xw hhdenus_xw hhsize hhtype_dv hidp ieqmoecd_dv nkids_dv psu strata tenure_dv urban_dv"

// These variables from the youth files will be included. These include some key variables as determined by us PLUS any variables requested by you. 
local youthvars "age_dv country ethn_dv gor_dv hidp pidp pno psu racel_dv sex_dv strata urban_dv ythscbh_xw ythscub_xw ythscui_xw ythscus_xw"


/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Anything below this line should not be changed! Any changes to the selection of variables and waves, and location of folders, should be made above. //
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// this program returns all variable names with the wave prefix
program define getVars, rclass
    version 14.0
	if ("`1'" != "") {
		local wavemyvars = " `1'"
		local wavemyvars = subinstr("`wavemyvars'"," "," `2'_",.)
		local wavemyvars = substr("`wavemyvars'",2,.)
	}
	else local wavemyvars = ""
	return local fixedVars "`wavemyvars'"
end

// this program to returns  which variables exist in this wave
program define getExistingVars, rclass
    version 14.0
	local all = ""
	foreach var in `1' {
		capture confirm variable `var'
		if !_rc {
			local all = "`all' `var'"
		}
	}
	return local existingVars "`all'"
end  

//loop through each wave
foreach wave in `allWaves' {
	// find the wave number
	local waveno=strpos("abcdefghijklmnopqrstuvwxyz","`wave'")

	// find the wave household vars
	getVars "`hhvars'" `wave'
	local wavehhvars = "`r(fixedVars)'"
	
	// find the wave individual vars
	getVars "`indvars'" `wave'
	local waveindvars = "`r(fixedVars)'"
	
	// find the wave all individual vars
	getVars "`indallvars'" `wave'
	local waveindallvars = "`r(fixedVars)'"
	
	// find the wave child vars
	getVars "`chvars'" `wave'
	local wavechvars = "`r(fixedVars)'"
	
	// find the wave youth vars
	getVars "`youthvars'" `wave'
	local waveyouthvars = "`r(fixedVars)'"
	
	// open the the household level file with the required variables
	use "$ukhls/`wave'_hhresp", clear
	getExistingVars "`wave'_hidp `wavehhvars'"
	keep `r(existingVars)'
	
	// if only household variables are required, skip this part and return all households
	if ("`indvars'" != "" || "`chvars'" != "" || "`youthvars'" != "") {
		// if any individual variable is required, first  merge INDALL keeping the pipd (and possibly some default variables?), so that other files can merge on it.
		merge 1:m `wave'_hidp using "$ukhls/`wave'_indall"
		drop _merge
		// drop loose households with no individuals
		drop if (pidp == .)
		
		// keep only variables that were requested and exist in this wave
		getExistingVars "pidp `wave'_hidp `wavehhvars' `waveindallvars'"
		keep `r(existingVars)'
		
		// add any requested individual variables
		if ("`indvars'" != "") {
			merge 1:1 pidp using "$ukhls/`wave'_indresp"
			drop _merge
			// keep only variables that were requested and exist in this wave
			getExistingVars "pidp `wave'_hidp `wavehhvars' `waveindvars' `waveyouthvars' `wavechvars' `waveindallvars'"
			keep `r(existingVars)'
		}
		// add any requested youth variables
		if ("`waveyouthvars'" != "") {
			merge 1:1 pidp using "$ukhls/`wave'_youth"
			drop _merge
			// keep only variables that were requested and exist in this wave
			getExistingVars "pidp `wave'_hidp `wavehhvars' `waveindvars' `waveyouthvars' `wavechvars' `waveindallvars'"
			keep `r(existingVars)'
		}
		// add any requested child variables
		if ("`wavechvars'" != "") {
			merge 1:1 pidp using "$ukhls/`wave'_child"
			drop _merge
			// keep only variables that were requested and exist in this wave
			getExistingVars "pidp `wave'_hidp `wavehhvars' `waveindvars' `waveyouthvars' `wavechvars' `waveindallvars'"
			keep `r(existingVars)'
		}
	}

	// create a wave variable
	gen wavename=`waveno'

	// drop the wave prefix from all variables
	rename `wave'_* *

	// save the file that was created
	save temp_`wave', replace
	
}

// open the file for the first wave (wave a_)
local firstWave = substr("`allWaves'", 1, 1)
use temp_`firstWave', clear

// loop through the remaining waves appending them in the long format
foreach w in `allWaves' {
	// append the files for the second wave onwards
	append using temp_`w'
}

// check how many observations are available from each wave
tab wavename

// move pidp to the beginning of the file
order pidp, first

// save the long file
save "$outputpath/`outputfilename'", replace

// erase temporary files
foreach w in `allWaves' {
	erase temp_`w'.dta
}
